setwd('~/Dropbox/Apps/Overleaf/Purge')

###############################################
###############################################
###############################################

# Figure 2: Grain Retention Levels ----

source('code/pre_analysis.R')

plot <- df[, .('retain' = mean(retain, na.rm = T)), by = year] %>% 
  ggplot(aes(x = year, y = retain)) + geom_hline(yintercept = 165, linetype = 1) +
  geom_vline(xintercept = 1957.5) + geom_point(size = 2.3) + geom_line() +
  labs(x = 'Year', y = 'Grain retention per capita (kg)') + 
  scale_x_continuous(n.breaks = 8) + scale_y_continuous(expand = expansion(mult = c(.2, .2))) + mytheme()

ggsave(plot, width = 5, height = 4, filename = 'figure/fig_2.pdf')

###############################################
###############################################
###############################################

# Figure 3: Event Studies ----

source('code/pre_analysis.R')

eq <- 'sw(over_exc, ln_dr) ~ i(year, ln_purge, 1957) + (.[cov])*factor(year) | countyid + year'
m <- feols(as.formula(eq), cluster = ~ countyid, data = df)
dv_lab <- c("1" = "Over-extraction", "2" = "Log mortality")

plot <- iplot(m, sep = 0, plot_prms = list(col = NULL, lty = NULL))$prms %>% 
  ggplot(aes(x = x, y = y, ymin = ci_low, ymax = ci_high)) + 
  facet_wrap(~id, labeller = labeller(id = dv_lab), scales = 'free_y') + 
  geom_point(size = 2.5) + geom_errorbar(width = 0) + geom_line() + 
  geom_hline(yintercept = 0) + geom_vline(xintercept = 1957) +
  scale_x_continuous(n.breaks = 8) + 
  scale_y_continuous(n.breaks = 4, expand = expansion(mult = c(.3, .3))) + 
  labs(x = 'Year', y = 'Effects of purges') + mytheme()

ggsave(plot, height = 3, width = 7.5, filename = 'figure/fig_3.pdf')

###############################################
###############################################
###############################################

# Figure 4: Placebo Tests ----

source('code/pre_analysis.R')

eq <- 'sw(over_exc, ln_dr) ~ i(year, campaign, 1957) + (.[cov])*factor(year) | countyid + year'

placebo <- function(treatment){ 
  f <- as.formula(gsub('campaign', treatment, eq))
  m <- feols(f, cluster = ~ countyid, data = df)
  p <- iplot(m, sep = 0, plot_prms = list(col = NULL, lty = NULL))$prms
}

plot <- function(data){
  ggplot(data, aes(x = x, y = y, ymin = ci_low, ymax = ci_high)) + 
    facet_wrap(~id, labeller = labeller(id = dv_lab), scales = 'free_y') + 
    geom_point(size = 2.5) + geom_errorbar(width = 0) + geom_line() + 
    geom_hline(yintercept = 0) + geom_vline(xintercept = 1957) +
    scale_x_continuous(n.breaks = 8) + 
    scale_y_continuous(n.breaks = 4, expand = expansion(mult = c(.3, .3))) + 
    labs(x = 'Year', y = 'Effects of placebo repression') + mytheme()
}

dv_lab <- c("1" = "Over-extraction", "2" = "Log mortality")

p1 <- plot(placebo('log(zf_num_kill/100+1)')) + ggtitle('Panel A: The Suppression of Counterrevolutionaries') 
p2 <- plot(placebo('log(anticor)')) + ggtitle('Panel B: The Anti-Corruption Campaign')

ggsave((p1 / p2) + plot_layout(ncol = 1), height = 6.5, width = 7.5, filename = 'figure/fig_4.pdf')

###############################################
###############################################
###############################################

# Figure 5: RDD Jumps ----

source('code/pre_analysis.R')

rd[, dist_int := ave(dist, cut_width(dist, 15, boundary = 0), FUN = mean)] # create distance intervals
rd[, purge := log((purge_intp+1)/popu57*1000)][, death := ln_dr] # outcome variables

x <- rd[, .('dv' = mean(purge, na.rm = T)), by = dist_int][, lab := 1]
y <- rd[, .('dv' = mean(death, na.rm = T)), by = dist_int][, lab := 2]

d <- bind_rows(x, y) %>% mutate(side = ifelse(dist_int > 0, 1, 0)) %>% filter(between(dist_int, -300, 300))

dv_lab <- c('1' = 'Log purge victim density', '2' = 'Log death rate, 1960')

plot <- d %>%
    ggplot(aes(x = dist_int, y = dv, group = side)) + facet_wrap(~lab, labeller = labeller(lab = dv_lab), scales = 'free_y') +
    geom_point(col = 'gray80') + geom_smooth(method = "lm", formula = y~poly(x,1), se = F, color = 'black') +
    geom_vline(xintercept = 0, linewidth = .4) + scale_y_continuous(n.breaks = 7) +
    labs(x = 'Distance to the military border (km)') + mytheme() + theme(axis.title.y = element_blank()) 
    
ggsave(plot, width = 7, height = 3.5, filename = 'figure/fig_5.pdf')

###############################################
###############################################
###############################################

# Figure 6: Selection Effect ----

source('code/pre_analysis.R')

eq <- 'sw(turnover, ht_north) ~ i(year, ln_purge, 1957) + (.[cov])*factor(year) | countyid + year'
m <- feols(formula(eq), cluster = ~countyid, data = df)

out <- bind_rows(iplot(m[[1]], plot_prms = list(col = NULL, lty = NULL))$prms %>% mutate(dv = 'Leader turnover'),
                 iplot(m[[2]], plot_prms = list(col = NULL, lty = NULL))$prms %>% mutate(dv = 'Northern leader'))

plot <- out %>%
  ggplot(aes(x = x, y = y, ymin = ci_low, ymax = ci_high)) + facet_wrap(~dv, scales = 'free_y') +
  geom_point(size = 2.5) + geom_errorbar(width = 0) + geom_line() + 
  geom_vline(xintercept = 1957) + geom_hline(yintercept = 0) +
  scale_x_continuous(n.breaks = 8) + scale_y_continuous(expand = expansion(mult = c(.6, .6)), n.breaks = 4) +
  labs(x = 'Year', y = 'Effects of purges') + mytheme()

ggsave(plot, width = 7, height = 3.2, filename = 'figure/fig_6.pdf')

###############################################
###############################################
###############################################

# Table 1: Descriptive Statistics ----

source('code/pre_analysis.R')

sum_stat <- function(data){
  data.frame(
    "N" = apply(data, 2, function(x) sum(!is.na(x))),
    "Mean" = apply(data, 2, function(x) mean(x, na.rm = T)),
    "SD" = apply(data, 2, function(x) sd(x, na.rm = T)),
    "Min" = apply(data, 2, function(x) min(x, na.rm = T)),
    "Max" = apply(data, 2, function(x) max(x, na.rm = T))
  ) %>% return()
}

su <- rbind(sum_stat(df[year == 1957, .(ln_purge, ln_purge_pc)]), 
            sum_stat(df[, .(over_exc, ln_dr)]),
            sum_stat(df[year == 1957, ..cov_cnty]))

rownames(su) <- plyr::mapvalues(rownames(su), from = labels(lab), to = lab, warn_missing = F)

su %>% kbl(digits = 2, format = "latex", booktabs = T) %>%
  pack_rows("Explanatory variables", 1, 2, italic = T, latex_gap_space = '4mm') %>%
  pack_rows("Outcome variables", 3, 4, italic = T, latex_gap_space = '4mm') %>%
  pack_rows("County-level covariates", 5, nrow(su), italic = T, latex_gap_space = '4mm') %>%
  save_kable("table/tbl_1.tex")

###############################################
###############################################
###############################################

# Table 2: Baseline ----

source('code/pre_analysis.R')

f <- 'sw(over_exc, ln_dr) ~ sw(ln_purge*glf, ln_purge_pc*glf) + (.[cov])*glf | countyid + year'

m <- feols(formula(f), cluster = ~ countyid, data = df)

etable(m, group = list('PSC covariates $\\times$ GLF' = 'psc_'), order = 'victim', dict = lab, replace = T, file = 'table/tbl_2.tex')

###############################################
###############################################
###############################################

# Table 3: RDD Balance ----

source('code/pre_analysis.R')

n <- feols(sw(.[, rd_var]) ~ south + dist_abs*south | floor(long) + floor(lati) + EPROV, 
  cluster = ~grid, data = rd, subset = ~dist_abs <= 100, weight = ~1/dist_abs)

extract <- function(est, bandwidth){
  lapply(est, function(x) tidy(x)) %>% bind_rows() %>% filter(term == 'south') %>% 
    mutate(n = lapply(est, function(x) x$nobs) %>% unlist(), 
           ave = apply(rd[dist_abs <= bandwidth, rd_var, with = F], 2, function(x) mean(x, na.rm = T))) %>% 
    select(ave, estimate, p.value, n) %>% return()
}

out <- extract(n, bandwidth = 100)
out <- out %>% mutate(var = plyr::mapvalues(rd_var, from = labels(lab), to = lab, warn_missing = F))
col <- c('Variable', c('Mean', 'Coef.', 'p-value', 'N'))

out %>% relocate(var) %>%
  kbl(digits = 2, format = 'latex', booktabs = T, col.names = col) %>%
  pack_rows("Political characteristics", 1, 3, italic = T, latex_gap_space = '4mm') %>%
  pack_rows("Socioeconomic characteristics", 4, 9, italic = T, latex_gap_space = '4mm') %>%
  pack_rows("Geographical characteristics", 10, 13, italic = T, latex_gap_space = '4mm') %>%
  save_kable('table/tbl_3.tex')

###############################################
###############################################
###############################################

# Table 4: RDD Estimates ----

source('code/pre_analysis.R')

rdd <- function(DV, FZ, P, BW){
  d <- rd
  dv <- d[, ..DV] %>% as.vector() %>% unlist()
  fz <- d[, ..FZ] %>% as.vector() %>% unlist()
  est <- rdrobust(y = dv, x = d$dist, fuzzy = fz, p = P, h = BW, covs = rdfe, cluster = d$grid, weights = 1/d$dist_abs)
  result <- tibble(poly = P, coef = est$coef[1], se = est$se[1], pval = est$pv[1], bw = est$bws[1,1], n = sum(est$N_h)) %>% as.data.frame()
  return(result)
}

out <- rbind(rdd(DV = 'ln_purge', FZ = NULL, P = 1, BW = NULL), # first stage
             rdd(DV = 'ln_dr', FZ = NULL, P = 1, BW = NULL), # reduced form
             rdd(DV = 'ln_dr', FZ = 'ln_purge', P = 1, BW = NULL)) %>% data.table() # 2SLS

out[, reg := c('First-stage', 'Reduced-form', 'Two-stage')]
out[, dv := c('Log purge victims', rep('Log mortality, 1960', 2))]
col <- c('Regression', 'Outcome variable', 'Coefficient', 'Std. Error', 'Bandwidth', 'N')

out %>%  
  mutate(bw = round(bw)) %>% relocate(reg, dv) %>% select(reg, dv, coef, se, bw, n) %>%
  kbl(digits = 2, format = 'latex', booktabs = T, col.names = col) %>%
  column_spec(1, bold = T, italic = T) %>%
  save_kable('table/tbl_4.tex')

###############################################
###############################################
###############################################

# Table 5: Purge and Satellites ----

source('code/pre_analysis.R')

df[, w := sum(sat_all), by = year]

f <- 'sw(sat_all, sat_grn) ~ sw(ln_purge*glf, ln_purge_pc*glf) + (.[cov])*glf | countyid + year'

m <- feols(formula(f), cluster = ~ countyid, data = df, weights = ~w)

etable(m, group = list('Covariates $\\times$ GLF' = '!victim'), dict = lab, replace = T, file = 'table/tbl_5.tex')

###############################################
###############################################
###############################################

